*******************************************************
* Title: rwanda_teacher_compliance_jde.do
* Author: Todd Pugatch
* Last update: June 10 2024
*
* Description: data cleaning for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
*
* Inputs: 	rwanda_teacher_trainattend_v1.dta
*			headteacher_baseline_clean_jde.dta
* Outputs: 	rwanda_school_trainattend_jde.dta 
*
* Notes: cleans teacher administrative takeup data
*******************************************************

* set environment
local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off

* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"
	
/******************************************************************
			CLEAN TEACHER-LEVEL DATA
Training attendance is daily. Exchange visit attendance is by term.
	For training, define two variables: 1) attendance on any day and 
		2) attendance on all days. Then collapse by school.
*******************************************************************/
* load data
qui use "$rawdata/teacher/rwanda_teacher_trainattend_v1.dta", clear

* clean training data
aorder training* exchange* 

/*2016*/
forval t=2/3 {
	qui egen training_2016_T`t'_any=anymatch(training_2016_T`t'_d1-training_2016_T`t'_d4), v(1)
	qui egen training_2016_T`t'_allx=anycount(training_2016_T`t'_d1-training_2016_T`t'_d4), v(1)
	qui gen training_2016_T`t'_all=(training_2016_T`t'_allx==4)
	lab var training_2016_T`t'_any "attended any day of training, 2016 Term `t'"
	lab var training_2016_T`t'_all "attended all days of training, 2016 Term `t'"
}
/*2017*/
forval t=1/3 {
	qui egen training_2017_T`t'_any=anymatch(training_2017_T`t'_d1-training_2017_T`t'_d4), v(1)
	qui egen training_2017_T`t'_allx=anycount(training_2017_T`t'_d1-training_2017_T`t'_d4), v(1)
	qui gen training_2017_T`t'_all=(training_2017_T`t'_allx==4)
	lab var training_2017_T`t'_any "attended any day of training, 2017 Term `t'"
	lab var training_2017_T`t'_all "attended all days of training, 2017 Term `t'"
}
/*2018*/
qui egen training_2018_T1_any=anymatch(training_2018_T1_d1-training_2018_T1_d4), v(1)
qui egen training_2018_T1_allx=anycount(training_2018_T1_d1-training_2018_T1_d4), v(1)
qui gen training_2018_T1_all=(training_2018_T1_allx==4)
lab var training_2018_T1_any "attended any day of training, 2018 Term 1"
lab var training_2018_T1_all "attended all days of training, 2018 Term 1"
drop *allx

/*COLLAPSE BY SCHOOL*/
/*Collapsed data will by wide by school, with information on training and exchange visit
	attendance by term, and overall. Assign school a 1 if any teacher from school
	attended training or exchange visit.*/
qui collapse (mean) district* province* (max) training*any training*all exchange*, by(school_id)

* label variables
/*2016*/
forval t=2/3 {
	lab var training_2016_T`t'_any "teacher from school attended any day of training, 2016 Term `t'"
	lab var training_2016_T`t'_all "teacher from school attended all days of training, 2016 Term `t'"
	lab var exchange_2016_T`t' "teacher from school attended exchange visit, 2016 Term `t'"
}
/*2017*/
forval t=1/3 {
	lab var training_2017_T`t'_any "teacher from school attended any day of training, 2017 Term `t'"
	lab var training_2017_T`t'_all "teacher from school attended all days of training, 2017 Term `t'"
	lab var exchange_2017_T`t' "teacher from school attended exchange visit, 2017 Term `t'"
}
/*2018*/
lab var training_2018_T1_any "teacher from school attended any day of training, 2018 Term 1"
lab var training_2018_T1_all "teacher from school attended all days of training, 2018 Term 1"
lab var exchange_2018_T1 "teacher from school attended exchange visit, 2018 Term 1"

* create summary measures of takeup across all terms
aorder training*any exchange*	
aorder training*all
foreach x in any all {
	qui egen training_`x'_total=rowtotal(training_2016_T2_`x'-training_2018_T1_`x')
	qui gen training_`x'_pct=training_`x'_total/6
	lab var training_`x'_total "# of trainings (of 6) attended by school on `x' day(s), 2016-2018"
	lab var training_`x'_pct "% of trainings (of 6) attended by school on `x' day(s), 2016-2018"
}
qui gen training_any=(training_any_total>0 & training_any_total!=.)
lab var training_any "at least 1 day of training attended by school, 2016-2018"
qui egen exchange_total=rowtotal(exchange_2016_T2-exchange_2018_T1)
qui gen exchange_pct=exchange_total/6
qui gen exchange_any=(exchange_total>0 & exchange_total!=.)
lab var exchange_total "# of exchange visits (of 6) attended by school, 2016-2018"
lab var exchange_pct "% of exchange visits (of 6) attended by school, 2016-2018"
lab var exchange_any "at least 1 exchange visit attended by school, 2016-2018"

* merge with school treatment status
/*To compare school id's and names across datasets, use "school_code" and 
	"school" from headteacher_baseline_clean_jde.dta and spreadsheet 
	2016_2017_2018_RWANDA_teacherlist.xlsx. They appear to match up.*/
qui destring school_id, gen(school_code) force
qui drop if school_code==.
qui merge 1:1 school_code using "$cleandata/headteacher_baseline_clean_jde.dta", keepusing(treatment* school strata)
lab var _merge "merge between E! administrative compliance data and school treatment status"
* impute zeroes to schools in sample without compliance data
/*2016*/
forval t=2/3 {
	qui replace training_2016_T`t'_any=0 if training_2016_T`t'_any==. & treatment!=.
	qui replace training_2016_T`t'_all=0 if training_2016_T`t'_all==. & treatment!=.
	qui replace exchange_2016_T`t'=0 if exchange_2016_T`t'==. & treatment!=.
}
/*2017*/
forval t=1/3 {
	qui replace training_2017_T`t'_any=0 if training_2017_T`t'_any==. & treatment!=.
	qui replace training_2017_T`t'_all=0 if training_2017_T`t'_all==. & treatment!=.
	qui replace exchange_2017_T`t'=0 if exchange_2017_T`t'==. & treatment!=.
}
/*2018*/
qui replace training_2018_T1_any=0 if training_2018_T1_any==. & treatment!=.
qui replace training_2018_T1_all=0 if training_2018_T1_all==. & treatment!=.
qui replace exchange_2018_T1=0 if exchange_2018_T1==. & treatment!=.

foreach y in pct total {
	qui replace exchange_`y'=0 if exchange_`y'==. & treatment!=.
	foreach x in any all {
		qui replace training_`x'_`y'=0 if training_`x'_`y'==. & treatment!=.
	}
}
foreach t in exchange training {
	qui replace `t'_any=0 if `t'_any==. & treatment!=.
}

* clean and save
qui keep if _merge==2|_merge==3/*keep only schools in study*/
qui compress
lab data "E! Training & Exchange Visit Attendance, school level, 2016 Term 2-2018 Term 1"
qui save "$cleandata/rwanda_school_trainattend_jde.dta", replace

local end=`"$S_TIME"' 
di "`start'"
di "`end'"
